### Purpose of this script: To generate a clean data frame with
### information on all the towns considered for inclusion in the
### scheme with dummy variables giving the details of the Westminster
### areas containing in whole or in part the relevant town.

### Need to carry this out separately for each matching routine
### (any/all); averaging; principal

### Load libraries
library(sf)
library(tidyverse)
library(rio)


### ###############################################
### Prep work
### ###############################################

### Read in the raw data
dat <- read.csv("../data/towns_deal.csv")
more <- read.csv("../data/towns_high_priority.csv")

dat <- rbind(dat, more)

### Read in the BUA shapefiles
bua <- read_sf("../data/", "Built-up_Areas__December_2011__Boundaries_V2") %>%
    dplyr::select(ONSCode = BUA11CD)

bua_sd <- read_sf("../data/", "Built-up_Area_Sub_Divisions__December_2011__Boundaries") %>%
    dplyr::select(ONSCode = buasd11cd)

bua <- rbind(bua, bua_sd)

dat <- merge(bua, dat,
             by = "ONSCode",
             all.x = FALSE,
             all.y = TRUE)

### And the westminster shapefiles
wmin <- read_sf("../data/",
                "Westminster_Parliamentary_Constituencies__December_2017__Boundaries_UK")

### Add on whether each Westminster area was Conservative held or not.
res2017 <- rio::import("../data/BES-2019-General-Election-results-file-v1.0.xlsx") %>%
    mutate(Green17 = replace_na(Green17, 0),
           UKIP17 = replace_na(UKIP17, 0),
           LD17 = replace_na(LD17, 0))

wmin <- merge(wmin, res2017,
              by.x = "pcon17cd",
              by.y = "ONSConstID",
              all.x = FALSE,
              all.y = FALSE)


### ###############################################
### "Any"/max matching
### ###############################################

inter <- st_intersects(dat, wmin)

###
aux <- lapply(inter, function(i) {
    wmin[i,] %>%
        dplyr::select(Winner17,
                      Con17,
                      Lab17,
                      LD17,
                      UKIP17,
                      Green17) %>%                       
        st_drop_geometry() %>%
        summarize(ConWinner1 = any(Winner17 == "Conservative"),
                  ConWinner2 = mean(Winner17 == "Conservative"),
                  Con17 = mean(Con17),
                  Lab17 = mean(Lab17),
                  LD17 = mean(LD17),
                  UKIP17 = mean(UKIP17),
                  Green17 = mean(Green17),
                  ConMaj = Con17 - pmax(Lab17, LD17, UKIP17, Green17))
})

aux <- aux %>%
    bind_rows()

names(aux) <- paste0(names(aux), ".allm")

### Merge this back on
dat <- cbind(dat, aux)

### ###############################################
### Primary match
### ###############################################

wmin <- st_make_valid(wmin)
inter <- st_intersection(dat, wmin)

### Group by town, and get proportions of the total area
lu <- inter %>%
    group_by(ONSCode, Town) %>%
    dplyr::select(ONSCode, Town, pcon17cd, pcon17nm, geometry) %>%
    mutate(ar = st_area(geometry)) %>%
    filter(ar == max(ar, na.rm = TRUE))

lu <- lu %>%
    st_drop_geometry() %>%
    dplyr::select(ONSCode, pcon17cd)

aux <- merge(lu,
             res2017 %>%
             dplyr::select(ONSConstID,
                           Con17,
                           Winner17,
                           Lab17,
                           LD17,
                           Green17,
                           UKIP17) %>%
             mutate(ConWinner1 = as.numeric(Winner17 == "Conservative"),
                    ConMaj = Con17 - pmax(Lab17, LD17, UKIP17, Green17)),
             by.x = "pcon17cd",
             by.y = "ONSConstID",
             all.x = TRUE,
             all.y = FALSE)

names(aux) <- paste0(names(aux), ".primary")

dat <- merge(dat, aux,
             by.x = "ONSCode",
             by.y = "ONSCode.primary",
             all.x = TRUE,
             all.y = FALSE)

### ###############################################
### Averaging
### ###############################################


lu <- inter %>%
    group_by(ONSCode) %>%
    dplyr::select(ONSCode, pcon17cd, pcon17nm, geometry) %>%
    mutate(ar = st_area(geometry)) %>%
    st_drop_geometry()

aux <- merge(lu,
             res2017,
             by.x = "pcon17cd",
             by.y = "ONSConstID",
             all.x = TRUE,
             all.y = FALSE) %>%
    dplyr::select(ONSCode,
                  ar,
                  Con17,
                  Winner17,
                  Lab17,
                  LD17,
                  Green17,
                  UKIP17) %>%
    mutate(ConWinner = mean(Winner17 == "Conservative"),
           ar = as.numeric(ar)) %>%
    dplyr::select(-Winner17) %>% 
    group_by(ONSCode) %>%
    summarize(Con17 = weighted.mean(Con17, ar),
              ConWinner = weighted.mean(ConWinner, ar),
              Lab17 = weighted.mean(Lab17, ar),
              LD17 = weighted.mean(LD17, ar),
              Green17 = weighted.mean(Green17, ar),
              UKIP17 = weighted.mean(UKIP17, ar)) %>%
    mutate(ConMaj = Con17 - pmax(Lab17, LD17, UKIP17, Green17))


names(aux) <- paste0(names(aux), ".weighted")

dat <- merge(dat, aux,
             by.x = "ONSCode",
             by.y = "ONSCode.weighted",
             all.x = TRUE,
             all.y = FALSE)


saveRDS(dat, file = "../working/selection_data.rds")

